UNEMPLOYMENT ANALYSIS WITH PYTHON
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
data= pd.read_csv(r"C:\\Users\\trupti kadam\\Downloads\\Data Task 2.csv")
data
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Region.1 | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.740 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.740 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.740 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.740 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.740 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 262 | West Bengal | 30-06-2020 | M | 7.29 | 30726310 | 40.39 | East | 22.9868 | 87.855 |
| 263 | West Bengal | 31-07-2020 | M | 6.83 | 35372506 | 46.17 | East | 22.9868 | 87.855 |
| 264 | West Bengal | 31-08-2020 | M | 14.87 | 33298644 | 47.48 | East | 22.9868 | 87.855 |
| 265 | West Bengal | 30-09-2020 | M | 9.35 | 35707239 | 47.73 | East | 22.9868 | 87.855 |
| 266 | West Bengal | 31-10-2020 | M | 9.98 | 33962549 | 45.63 | East | 22.9868 | 87.855 |
267 rows × 9 columns
df.head(100)
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Region.1 | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.7400 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.7400 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.7400 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.7400 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.7400 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 95 | Jammu & Kashmir | 31-07-2020 | M | 10.88 | 3558889 | 38.03 | North | 33.7782 | 76.5762 |
| 96 | Jammu & Kashmir | 31-08-2020 | M | 11.09 | 3429950 | 36.66 | North | 33.7782 | 76.5762 |
| 97 | Jammu & Kashmir | 30-09-2020 | M | 16.17 | 3210281 | 36.31 | North | 33.7782 | 76.5762 |
| 98 | Jammu & Kashmir | 31-10-2020 | M | 16.14 | 3106691 | 35.05 | North | 33.7782 | 76.5762 |
| 99 | Jharkhand | 31-01-2020 | M | 10.61 | 10198029 | 42.92 | East | 23.6102 | 85.2799 |
100 rows × 9 columns
#Checking for missing values
df.isnull().sum()
Region 0 Date 0 Frequency 0 Estimated Unemployment Rate (%) 0 Estimated Employed 0 Estimated Labour Participation Rate (%) 0 Region.1 0 longitude 0 latitude 0 dtype: int64
#Description of dataset
df.describe()
| Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | longitude | latitude | |
|---|---|---|---|---|---|
| count | 267.000000 | 2.670000e+02 | 267.000000 | 267.000000 | 267.000000 |
| mean | 12.236929 | 1.396211e+07 | 41.681573 | 22.826048 | 80.532425 |
| std | 10.803283 | 1.336632e+07 | 7.845419 | 6.270731 | 5.831738 |
| min | 0.500000 | 1.175420e+05 | 16.770000 | 10.850500 | 71.192400 |
| 25% | 4.845000 | 2.838930e+06 | 37.265000 | 18.112400 | 76.085600 |
| 50% | 9.650000 | 9.732417e+06 | 40.390000 | 23.610200 | 79.019300 |
| 75% | 16.755000 | 2.187869e+07 | 44.055000 | 27.278400 | 85.279900 |
| max | 75.850000 | 5.943376e+07 | 69.690000 | 33.778200 | 92.937600 |
#correlation between the features of this dataset
df.corr()
| Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | longitude | latitude | |
|---|---|---|---|---|---|
| Estimated Unemployment Rate (%) | 1.000000 | -0.245176 | -0.073540 | 0.149976 | -0.023976 |
| Estimated Employed | -0.245176 | 1.000000 | -0.047948 | -0.113664 | -0.119321 |
| Estimated Labour Participation Rate (%) | -0.073540 | -0.047948 | 1.000000 | 0.080372 | 0.397836 |
| longitude | 0.149976 | -0.113664 | 0.080372 | 1.000000 | 0.125895 |
| latitude | -0.023976 | -0.119321 | 0.397836 | 0.125895 | 1.000000 |
#visual representation of correlation between the features of this dataset
plt.style.use('seaborn-whitegrid')
plt.figure(figsize=(14, 12))
sns.heatmap(df.corr())
plt.show()
#unemployment rate according to different regions of India
df.columns= ["States","Date","Frequency",
"Estimated Unemployment Rate","Estimated Employed",
"Estimated Labour Participation Rate","Region",
"longitude","latitude"]
plt.figure(figsize=(10, 8))
plt.title("Indian Unemployment")
sns.histplot(x="Estimated Unemployment Rate", hue="Region", data=df)
plt.show()
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x16dfa9f4310>
unemploment = df[["States", "Region", "Estimated Unemployment Rate"]]
figure = px.sunburst(unemploment, path=["Region", "States"],
values="Estimated Unemployment Rate",
width=500, height=500, color_continuous_scale="RdY1Gn",
title="Unemployment Rate in India")
figure.show()
Analysis of unemployment rate by using the python programming language is done.